{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from fastparquet import ParquetFile\n",
    "from fastparquet import write\n",
    "import pyarrow.parquet as pq\n",
    "\n",
    "import matplotlib as mpl\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "\n",
    "#mayplotlib graphs will be included in the notebook\n",
    "%matplotlib inline\n",
    "plt.style.use('ggplot')\n",
    "plt.style.use('ggplot')\n",
    "plt.rcParams['figure.figsize'] = 10,7\n",
    "\n",
    "import sys\n",
    "path = '/Users/mithursan/data'\n",
    "sys.path.append('../src')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def prepare_data_kibot(raw_data_path, out_data_path):\n",
    "    df = pd.read_csv(raw_data_path,\n",
    "                    header=None,\n",
    "                    names=['day', 'time', 'price', 'bid', 'ask', 'vol'])\n",
    "    df['date'] = pd.to_datetime(df['day'] + df['time'],\n",
    "                               format = '%m/%d/%Y%H:%M:%S')\n",
    "    df['dollar_vol'] = df['price']*df['vol']\n",
    "    df = df.set_index('date')\n",
    "    df = df.drop(['day', 'time'],\n",
    "                axis=1)\n",
    "    df = df.drop_duplicates()\n",
    "    #df = df.to_parquet(out_data_path)\n",
    "    write(out_data_path, df)\n",
    "    #write('WDC_tickbidask.parq', df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {},
   "outputs": [],
   "source": [
    "raw_data_path = '/Users/mithursan/data/raw/IVE_tickbidask.txt'\n",
    "out_data_path = '/Users/mithursan/IVE_tickbidask.parq'\n",
    "prepare_data_kibot(raw_data_path, out_data_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "raw_data_path = '/Users/mithursan/data/raw/WDC_tickbidask.txt'\n",
    "out_data_path = '/Users/mithursan/WDC_tickbidask.parq'\n",
    "prepare_data_kibot(raw_data_path, out_data_path)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 119,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Data Cleaning\n",
    "from fastparquet import ParquetFile\n",
    "out_data_path = '/Users/mithursan/IVE_tickbidask.parq'\n",
    "pf = ParquetFile(out_data_path)\n",
    "df = pf.to_pandas()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlkAAAGfCAYAAABsocdzAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAFRpJREFUeJzt3V+InPXd9/HPZreKm5DNZsc/7Kal\npkaeKmotEYNik8Y5KNryhB4EalNIPZAaSEARDEI98WR7EJOGGjxo0VIfKD1oUir2ZEmTQkJgNQo2\ntlZBSs1a4mY3mjVWTTL3gbj37XPHZt3st5NJXq8jZ7yuub47sMPb32+8tqvVarUCAMCcmtfuAQAA\nLkQiCwCggMgCACggsgAACogsAIACIgsAoIDIAgAoILIAAAqILACAAiILAKBAT7sH+MTY2Fi7RwAu\nMI1GI+Pj4+0eA7jADA4Ozug4K1kAAAVEFgBAAZEFAFBAZAEAFBBZAAAFRBYAQAGRBQBQQGQBABQQ\nWQAABUQWAEABkQUAUEBkAQAUEFkAAAVEFgBAAZEFAFCg52wH7NixIwcPHkxfX1+2bNmSJPnVr36V\nF154IT09PbnyyiuzYcOGzJ8/P0myc+fO7N69O/PmzcsPf/jDfO1rX6v9CQAAzkNnjaxVq1blW9/6\nVp544onp52688cbcc8896e7uzjPPPJOdO3dm3bp1efPNN7N///48/vjjmZyczGOPPZaf/vSnmTfP\nghnw7w0NDbV7hBk7fPhwu0cAOsBZI+u6667LkSNHPvXcTTfdNP3P1157bQ4cOJAkGR0dzW233ZYv\nfOELueKKK3LVVVfl9ddfz7XXXjvHYwMXmopwGRoaEkRA25zzEtPu3buntwQnJiYyMDAw/e8WL16c\niYmJc70EAEDHOetK1r/z29/+Nt3d3bnjjjuSJK1Wa8bnjoyMZGRkJEkyPDycRqNxLqMAnJHPFqBd\nZh1Ze/bsyQsvvJBHH300XV1dSZKBgYEcPXp0+piJiYksXrz4jOc3m800m83px+Pj47MdBeAz+WwB\n5trg4OCMjpvVduFLL72U3/3ud3n44Ydz6aWXTj+/fPny7N+/Px999FGOHDmSt956K9dcc81sLgEA\n0NG6WmfZ49u2bVteeeWVHD9+PH19fVm7dm127tyZkydPZsGCBUmSZcuW5b777kvy8RbiH//4x8yb\nNy/r16/PzTffPKNBxsbGzvFHAfg0X3wHKsx0JeuskfWfIrKAuSaygAql24UAAPx7IgsAoIDIAgAo\nILIAAAqILACAAiILAKCAyAIAKCCyAAAKiCwAgAIiCwCggMgCACggsgAACogsAIACIgsAoIDIAgAo\nILIAAAqILACAAiILAKCAyAIAKCCyAAAKiCwAgAIiCwCggMgCACggsgAACogsAIACIgsAoIDIAgAo\nILIAAAqILACAAiILAKCAyAIAKCCyAAAKiCwAgAIiCwCggMgCACggsgAACogsAIACIgsAoIDIAgAo\nILIAAAqILACAAiILAKCAyAIAKNDT7gGAznP99dfn2LFj7R5jRoaGhto9wlktWrQohw4davcYwBwT\nWcDnduzYsRw+fLjdY5xVo9HI+Ph4u8c4q04IQeDzs10IAFBAZAEAFBBZAAAFzvqdrB07duTgwYPp\n6+vLli1bkiRTU1PZunVr3n777Vx++eV54IEHsmDBgrRarTz11FN58cUXc+mll2bDhg1ZunRp+Q8B\nAHC+OetK1qpVq/LII4986rldu3blhhtuyPbt23PDDTdk165dSZIXX3wx//znP7N9+/bcd999+fnP\nf14zNQDAee6skXXddddlwYIFn3pudHQ0K1euTJKsXLkyo6OjSZLnn38+3/jGN9LV1ZVrr7027733\nXiYnJwvGBgA4v83qO1nvvPNO+vv7kyT9/f159913kyQTExNpNBrTxw0MDGRiYmIOxgQA6Cxzep+s\nVqv1v57r6uo647EjIyMZGRlJkgwPD38qzoDzXyf8zvb09HTEnElnvJ/A5zOryOrr68vk5GT6+/sz\nOTmZhQsXJvl45ep/3vjv6NGj0yte/79ms5lmszn9uBNuGAj8t074ne2Um5EmnfF+Ah8bHByc0XGz\n2i5cvnx59u7dmyTZu3dvbrnllunn//SnP6XVauVvf/tbent7PzOyAAAuZGddydq2bVteeeWVHD9+\nPD/60Y+ydu3arFmzJlu3bs3u3bvTaDTy4IMPJkluvvnmHDx4MJs2bcoll1ySDRs2lP8AAADno67W\nmb5I1QZjY2PtHgGYoaGhIX+7cA51yvsJfKx0uxAAgH9PZAEAFBBZAAAFRBYAQAGRBQBQQGQBABQQ\nWQAABUQWAEABkQUAUEBkAQAUEFkAAAVEFgBAAZEFAFBAZAEAFBBZAAAFRBYAQAGRBQBQQGQBABQQ\nWQAABUQWAEABkQUAUEBkAQAUEFkAAAVEFgBAAZEFAFBAZAEAFBBZAAAFulqtVqvdQyTJ2NhYu0cA\nZuj//r+/tnuEC87vvv9/2j0CMEODg4MzOk5kAZ/b0NBQDh8+3O4xzqrRaGR8fLzdY5xVp7yfwMdm\nGlm2CwEACogsAIACIgsAoIDIAgAoILIAAAqILACAAiILAKCAyAIAKCCyAAAKiCwAgAIiCwCggMgC\nACggsgAACogsAIACIgsAoIDIAgAoILIAAAqILACAAiILAKCAyAIAKNBzLic/++yz2b17d7q6uvLF\nL34xGzZsyLFjx7Jt27ZMTU3l6quvzsaNG9PTc06XAQDoOLNeyZqYmMgf/vCHDA8PZ8uWLTl9+nT2\n79+fZ555JnfffXe2b9+e+fPnZ/fu3XM5LwBARzin7cLTp0/nww8/zKlTp/Lhhx9m0aJFOXToUFas\nWJEkWbVqVUZHR+dkUACATjLrfbzFixfnO9/5Tu6///5ccskluemmm7J06dL09vamu7t7+piJiYkz\nnj8yMpKRkZEkyfDwcBqNxmxHAdqgE35ne3p6OmLOpDPeT+DzmXVkTU1NZXR0NE888UR6e3vz+OOP\n56WXXprx+c1mM81mc/rx+Pj4bEcB2qATfmcbjUZHzJl0xvsJfGxwcHBGx806sl5++eVcccUVWbhw\nYZLk1ltvzauvvpoTJ07k1KlT6e7uzsTERBYvXjzbSwAAdKxZfyer0WjktddeywcffJBWq5WXX345\nS5YsyfXXX58DBw4kSfbs2ZPly5fP2bAAAJ1i1itZy5Yty4oVK/Lwww+nu7s7X/7yl9NsNvP1r389\n27Zty69//etcffXVWb169VzOCwDQEbparVar3UMkydjYWLtHAGZoaGgohw8fbvcYZ9Up38nqlPcT\n+NhMv5Plju8AAAVEFgBAAZEFAFBAZAEAFBBZAAAFRBYAQAGRBQBQQGQBABQQWQAABUQWAEABkQUA\nUEBkAQAUEFkAAAVEFgBAAZEFAFBAZAEAFBBZAAAFRBYAQAGRBQBQQGQBABQQWQAABUQWAEABkQUA\nUEBkAQAUEFkAAAVEFgBAAZEFAFBAZAEAFBBZAAAFRBYAQAGRBQBQQGQBABQQWQAABUQWAEABkQUA\nUEBkAQAUEFkAAAV62j0A0JmGhobaPcIFY9GiRe0eASggsoDP7fDhw+0eYUaGhoY6ZlbgwmO7EACg\ngMgCACggsgAACogsAIACIgsAoIDIAgAoILIAAAqILACAAiILAKDAOd3x/b333suTTz6Zf/zjH+nq\n6sr999+fwcHBbN26NW+//XYuv/zyPPDAA1mwYMFczQsA0BG6Wq1Wa7Yn/+xnP8tXv/rV3HnnnTl5\n8mQ++OCD7Ny5MwsWLMiaNWuya9euTE1NZd26dWd9rbGxsdmOAXBG/qwOUGFwcHBGx816u/DEiRP5\ny1/+ktWrVydJenp6Mn/+/IyOjmblypVJkpUrV2Z0dHS2lwAA6Fiz3i48cuRIFi5cmB07duTvf/97\nli5dmvXr1+edd95Jf39/kqS/vz/vvvvuGc8fGRnJyMhIkmR4eDiNRmO2owB8Jp8tQLvMOrJOnTqV\nN954I/fee2+WLVuWp556Krt27Zrx+c1mM81mc/rx+Pj4bEcB+Ew+W4C5Vr5dODAwkIGBgSxbtixJ\nsmLFirzxxhvp6+vL5ORkkmRycjILFy6c7SUAADrWrCNr0aJFGRgYmP7C+ssvv5wlS5Zk+fLl2bt3\nb5Jk7969ueWWW+ZmUgCADnJOt3C49957s3379pw8eTJXXHFFNmzYkFarla1bt2b37t1pNBp58MEH\n52pWAICOcU63cJhLbuEAzDW3cAAqlH8nCwCAzyayAAAKiCwAgAIiCwCggMgCACggsgAACogsAIAC\nIgsAoIDIAgAoILIAAAqILACAAiILAKCAyAIAKCCyAAAKiCwAgAIiCwCggMgCACggsgAACogsAIAC\nIgsAoIDIAgAoILIAAAqILACAAiILAKCAyAIAKCCyAAAKiCwAgAIiCwCggMgCACggsgAACogsAIAC\nIgsAoIDIAgAoILIAAAqILACAAiILAKCAyAIAKCCyAAAKiCwAgAIiCwCggMgCACggsgAACogsAIAC\nIgsAoIDIAgAoILIAAAqILACAAiILAKBAz7m+wOnTp7N58+YsXrw4mzdvzpEjR7Jt27ZMTU3l6quv\nzsaNG9PTc86XAQDoKOe8kvXcc89laGho+vEzzzyTu+++O9u3b8/8+fOze/fuc70EAEDHOafIOnr0\naA4ePJg777wzSdJqtXLo0KGsWLEiSbJq1aqMjo6e+5QAAB3mnPbxnn766axbty7vv/9+kuT48ePp\n7e1Nd3d3kmTx4sWZmJg447kjIyMZGRlJkgwPD6fRaJzLKABn5LMFaJdZR9YLL7yQvr6+LF26NIcO\nHfrc5zebzTSbzenH4+Pjsx0F4DP5bAHm2uDg4IyOm3Vkvfrqq3n++efz4osv5sMPP8z777+fp59+\nOidOnMipU6fS3d2diYmJLF68eLaXAADoWLOOrHvuuSf33HNPkuTQoUP5/e9/n02bNuXxxx/PgQMH\ncvvtt2fPnj1Zvnz5nA0LANAp5vw+Wd///vfz7LPPZuPGjZmamsrq1avn+hIAAOe9rlar1Wr3EEky\nNjbW7hGAC8zQ0FAOHz7c7jGAC8xMv5Plju8AAAVEFgBAAZEFAFBAZAEAFBBZAAAFRBYAQAGRBQBQ\nQGQBABQQWQAABUQWAEABkQUAUEBkAQAUEFkAAAVEFgBAAZEFAFBAZAEAFBBZAAAFRBYAQAGRBQBQ\nQGQBABQQWQAABUQWAEABkQUAUEBkAQAUEFkAAAV62j0AQJIMDQ11zOsePnx4zl8TuPCILOC8UBEu\njUYj4+Pjc/66ADNhuxAAoIDIAgAoILIAAAqILACAAiILAKCAyAIAKCCyAAAKiCwAgAIiCwCggMgC\nACggsgAACogsAIACIgsAoIDIAgAoILIAAAqILACAAiILAKCAyAIAKCCyAAAKiCwAgAIiCwCgQM9s\nTxwfH88TTzyRY8eOpaurK81mM3fddVempqaydevWvP3227n88svzwAMPZMGCBXM5MwDAeW/WkdXd\n3Z0f/OAHWbp0ad5///1s3rw5N954Y/bs2ZMbbrgha9asya5du7Jr166sW7duLmcGADjvzXq7sL+/\nP0uXLk2SXHbZZRkaGsrExERGR0ezcuXKJMnKlSszOjo6N5MCAHSQOflO1pEjR/LGG2/kmmuuyTvv\nvJP+/v4kH4fYu+++OxeXAADoKLPeLvzEv/71r2zZsiXr169Pb2/vjM8bGRnJyMhIkmR4eDiNRuNc\nRwH4lJ6eHp8tQNucU2SdPHkyW7ZsyR133JFbb701SdLX15fJycn09/dncnIyCxcuPOO5zWYzzWZz\n+vH4+Pi5jALwvzQaDZ8twJwbHByc0XGz3i5stVp58sknMzQ0lG9/+9vTzy9fvjx79+5Nkuzduze3\n3HLLbC8BANCxulqtVms2J/71r3/No48+mi996Uvp6upKknzve9/LsmXLsnXr1oyPj6fRaOTBBx+c\n0S0cxsbGZjMGwGeykgVUmOlK1qwja66JLGCuiSygQvl2IQAAn01kAQAUEFkAAAVEFgBAAZEFAFBA\nZAEAFBBZAAAFRBYAQAGRBQBQQGQBABQQWQAABUQWAEABkQUAUEBkAQAUEFkAAAVEFgBAAZEFAFBA\nZAEAFBBZAAAFRBYAQAGRBQBQQGQBABQQWQAABUQWAEABkQUAUEBkAQAUEFkAAAVEFgBAAZEFAFBA\nZAEAFBBZAAAFRBYAQAGRBQBQQGQBABQQWQAABUQWAEABkQUAUEBkAQAUEFkAAAVEFgBAAZEFAFBA\nZAEAFBBZAAAFRBYAQAGRBQBQQGQBABToafcAAHNtyZIlabVa04+7urry5ptvtnEi4GJkJQu4oHwS\nWL29vTlw4EB6e3vTarWyZMmSdo8GXGTKVrJeeumlPPXUUzl9+nTuvPPOrFmzpupSANM+CazXXnst\njUYjr732WpYtW5YTJ060ezTgIlOyknX69On84he/yCOPPJKtW7dm3759luqB/5iNGzdm9erVueyy\ny7J69eps3Lix3SMBF6GSlazXX389V111Va688sokyW233ZbR0VHL9cB/xE9+8pP85je/yV133ZXn\nnnsua9eubfdIwEWoZCVrYmIiAwMD048HBgYyMTFRcSmAM1q/fn3+/Oc/Z/369e0eBbhIlaxk/c//\nq+cTXV1dn3o8MjKSkZGRJMnw8HAajUbFKMBFZt68j//b8cSJE1mxYsWnnvM5A/wnlUTWwMBAjh49\nOv346NGj6e/v/9QxzWYzzWZz+vH4+HjFKMBFZtmyZXnsscdy++23p9FoZHx8PPv27cuPf/xjnzPA\nnBgcHJzRcSXbhV/5ylfy1ltv5ciRIzl58mT279+f5cuXV1wK4FM2bdqUhx56KPv27ctHH32Uffv2\n5aGHHsqmTZvaPRpwkelqnWlvbw4cPHgwv/zlL3P69Ol885vfzHe/+91/e/zY2FjFGMBFaNeuXdm+\nffv07Rs2bdrkNjLAnJnpSlZZZH1eIguYa59sFwLMpbZuFwIAXOxEFgBAAZEFAFBAZAEAFBBZAAAF\nRBYAQAGRBQBQQGQBABQQWQAABUQWAEABkQUAUEBkAQAUOG/+QDQAwIXEShZwwdq8eXO7RwAuYiIL\nAKCAyAIAKCCygAtWs9ls9wjARcwX3wEACljJAgAo0NPuAQDm2o4dO3Lw4MH09fVly5Yt7R4HuEhZ\nyQIuOKtWrcojjzzS7jGAi5zIAi441113XRYsWNDuMYCLnMgCACggsgAACogsAIACIgsAoICbkQIX\nnG3btuWVV17J8ePH09fXl7Vr12b16tXtHgu4yIgsAIACtgsBAAqILACAAiILAKCAyAIAKCCyAAAK\niCwAgAIiCwCggMgCACjwX/2YfpT8DjGwAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 720x504 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "fig, ax = plt.subplots()\n",
    "_ = ax.boxplot(df.price)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from fastparquet import ParquetFile\n",
    "out_data_path = '/Users/mithursan/WDC_tickbidask.parq'\n",
    "pf = ParquetFile(out_data_path)\n",
    "df = pf.to_pandas()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 118,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlkAAAGfCAYAAABsocdzAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAET1JREFUeJzt3U9onPedx/GPViKlsrAseZyDJi21\nWpclwW0pNjGUVsadU9OyOQWauBByKMTggt1DTQ496+LIGGJyaHEOWeitWhraizBWDyWg1G4xTpP2\nYEqRKLYi+Y9iN8b27MHU2+y6a1mab8ejvF63GZ5H8/UDM7z9/J55pq/dbrcDAEBH/Vu3BwAA2IhE\nFgBAAZEFAFBAZAEAFBBZAAAFRBYAQAGRBQBQQGQBABQQWQAABUQWAECBgW4P8HcLCwvdHgHYYBqN\nRhYXF7s9BrDBjI2NrWo7Z7IAAAqILACAAiILAKCAyAIAKCCyAAAKiCwAgAIiCwCggMgCACggsgAA\nCogsAIACIgsAoIDIAgAoILIAAAqILACAAiILAKCAyAIAKDDQ7QEAkqTZbHZ7hFWbn5/v9ghADxBZ\nwCOhIlyazaYgArrGciEAQAGRBQBQQGQBABQQWQAABUQWAEABkQUAUEBkAQAUEFkAAAVEFgBAAZEF\nAFBAZAEAFBBZAAAFRBYAQAGRBQBQQGQBABQQWQAABUQWAEABkQUAUEBkAQAUEFkAAAVEFgBAAZEF\nAFBAZAEAFBBZAAAFRBYAQAGRBQBQQGQBABQQWQAABUQWAEABkQUAUEBkAQAUEFkAAAVEFgBAAZEF\nAFBAZAEAFBBZAAAFRBYAQAGRBQBQQGQBABQQWQAABUQWAEABkQUAUEBkAQAUEFkAAAVEFgBAAZEF\nAFBAZAEAFBBZAAAFRBYAQIGBB21w4sSJnDlzJsPDwzl69GiSZGVlJVNTU7l06VK2bduWQ4cOZWho\nKO12OydPnszZs2fzqU99KgcOHMj4+Hj5PwIA4FHzwDNZe/fuzSuvvPKx56anp7Nz584cP348O3fu\nzPT0dJLk7Nmz+etf/5rjx4/n+9//fn7yk5/UTA0A8Ih7YGQ9+eSTGRoa+thzc3NzmZiYSJJMTExk\nbm4uSfLOO+/kG9/4Rvr6+vLFL34xH374YZaXlwvGBgB4tK3pmqwrV65kZGQkSTIyMpKrV68mSZaW\nltJoNO5tt3Xr1iwtLXVgTACA3vLAa7IeRrvd/j/P9fX13XfbmZmZzMzMJEkmJyc/FmcAneKzBeiW\nNUXW8PBwlpeXMzIykuXl5WzevDnJ3TNXi4uL97b74IMP7p3x+t9arVZarda9x/+4H0Cn+GwBOm1s\nbGxV261puXDXrl2ZnZ1NkszOzmb37t33nv/1r3+ddrudP/7xjxkcHPynkQUAsJH1te+3xvcPjh07\nlnfffTfXrl3L8PBwnnvuuezevTtTU1NZXFxMo9HI4cOH793C4ac//Wl+//vf57HHHsuBAwfy+c9/\nflWDLCwsdOQfBPB3zWYz8/Pz3R4D2GBWeybrgZH1ryKygE4TWUCF0uVCAAD+fyILAKCAyAIAKCCy\nAAAKiCwAgAIiCwCggMgCACggsgAACogsAIACIgsAoIDIAgAoILIAAAqILACAAiILAKCAyAIAKCCy\nAAAKiCwAgAIiCwCggMgCACggsgAACogsAIACIgsAoIDIAgAoILIAAAqILACAAiILAKCAyAIAKCCy\nAAAKiCwAgAIiCwCggMgCACggsgAACogsAIACIgsAoIDIAgAoILIAAAqILACAAiILAKCAyAIAKCCy\nAAAKiCwAgAIiCwCggMgCACggsgAACogsAIACIgsAoIDIAgAoILIAAAqILACAAiILAKCAyAIAKCCy\nAAAKDHR7AKD3PPXUU7l8+XK3x1iVZrPZ7REeaMuWLTl//ny3xwA6TGQBD+3y5cuZn5/v9hgP1Gg0\nsri42O0xHqgXQhB4eJYLAQAKiCwAgAIiCwCggMgCACggsgAACogsAIACIgsAoIDIAgAoILIAAAqI\nLACAAiILAKCAyAIAKLCuH4h+6623curUqfT19eUzn/lMDhw4kMuXL+fYsWNZWVnJ9u3bc/DgwQwM\n+B1qAOCTZc1nspaWlvKrX/0qk5OTOXr0aO7cuZPf/OY3efPNN/PMM8/k+PHj2bRpU06dOtXJeQEA\nesK6lgvv3LmTmzdv5vbt27l582a2bNmS8+fPZ8+ePUmSvXv3Zm5uriODAgD0kjWv442OjuY73/lO\nXn755Tz22GP58pe/nPHx8QwODqa/v//eNktLS/fdf2ZmJjMzM0mSycnJNBqNtY4CdEEvvGcHBgZ6\nYs6kN44n8HDWHFkrKyuZm5vLa6+9lsHBwbz66qv53e9+t+r9W61WWq3WvceLi4trHQXogl54zzYa\njZ6YM+mN4wncNTY2tqrt1hxZ586dy+OPP57NmzcnSZ5++um8//77uX79em7fvp3+/v4sLS1ldHR0\nrS8BANCz1nxNVqPRyJ/+9Kd89NFHabfbOXfuXJ544ok89dRTefvtt5Mkp0+fzq5duzo2LABAr1jz\nmawdO3Zkz549+dGPfpT+/v587nOfS6vVyle/+tUcO3YsP/vZz7J9+/bs27evk/MCAPSEvna73e72\nEEmysLDQ7RGAVWo2m5mfn+/2GA/UK9dk9crxBO5a7TVZ7vgOAFBAZAEAFBBZAAAFRBYAQAGRBQBQ\nQGQBABQQWQAABUQWAEABkQUAUEBkAQAUEFkAAAVEFgBAAZEFAFBAZAEAFBBZAAAFRBYAQAGRBQBQ\nQGQBABQQWQAABUQWAEABkQUAUEBkAQAUEFkAAAVEFgBAAZEFAFBAZAEAFBBZAAAFRBYAQAGRBQBQ\nQGQBABQQWQAABUQWAEABkQUAUEBkAQAUEFkAAAVEFgBAAZEFAFBAZAEAFBBZAAAFRBYAQAGRBQBQ\nQGQBABQQWQAABUQWAEABkQUAUEBkAQAUEFkAAAVEFgBAAZEFAFBAZAEAFBBZAAAFRBYAQAGRBQBQ\nQGQBABQQWQAABUQWAEABkQUAUEBkAQAUEFkAAAVEFgBAAZEFAFBAZAEAFBBZAAAF+trtdrvbQyTJ\nwsJCt0cAVuk//vO9bo+w4fzXC//e7RGAVRobG1vVdiILeGjNZjPz8/PdHuOBGo1GFhcXuz3GA/XK\n8QTuWm1kWS4EACgwsJ6dP/zww7z++uv5y1/+kr6+vrz88ssZGxvL1NRULl26lG3btuXQoUMZGhrq\n1LwAAD1hXZF18uTJfOUrX8kPf/jD3Lp1Kx999FF+/vOfZ+fOnXn22WczPT2d6enp7N+/v1PzAgD0\nhDUvF16/fj1/+MMfsm/fviTJwMBANm3alLm5uUxMTCRJJiYmMjc315lJAQB6yJrPZF28eDGbN2/O\niRMn8uc//znj4+N58cUXc+XKlYyMjCRJRkZGcvXq1fvuPzMzk5mZmSTJ5ORkGo3GWkcBuqAX3rMD\nAwM9MWfSG8cTeDhrjqzbt2/nwoULeemll7Jjx46cPHky09PTq96/1Wql1Wrde9wL3wAC/kcvvGd7\n5duFSW8cT+Cu8m8Xbt26NVu3bs2OHTuSJHv27MmFCxcyPDyc5eXlJMny8nI2b9681pcAAOhZa46s\nLVu2ZOvWrffub3Xu3Lk88cQT2bVrV2ZnZ5Mks7Oz2b17d2cmBQDoIev6duFLL72U48eP59atW3n8\n8cdz4MCBtNvtTE1N5dSpU2k0Gjl8+HCnZgUA6Bnu+A48tF65Q3mvXJPVK8cTuMsd3wEAukhkAQAU\nEFkAAAVEFgBAAZEFAFBAZAEAFBBZAAAFRBYAQAGRBQBQQGQBABQQWQAABUQWAEABkQUAUEBkAQAU\nEFkAAAVEFgBAAZEFAFBAZAEAFBBZAAAFRBYAQAGRBQBQQGQBABQQWQAABUQWAEABkQUAUEBkAQAU\nEFkAAAVEFgBAAZEFAFBAZAEAFBBZAAAFRBYAQAGRBQBQQGQBABQQWQAABUQWAEABkQUAUEBkAQAU\nEFkAAAVEFgBAAZEFAFBAZAEAFBBZAAAFRBYAQAGRBQBQQGQBABQQWQAABUQWAEABkQUAUEBkAQAU\nEFkAAAVEFgBAAZEFAFBAZAEAFBBZAAAFRBYAQAGRBQBQQGQBABQQWQAABUQWAEABkQUAUEBkAQAU\nEFkAAAVEFgBAAZEFAFBgoNsDAL2p2Wx2e4QNY8uWLd0eASiw7si6c+dOjhw5ktHR0Rw5ciQXL17M\nsWPHsrKyku3bt+fgwYMZGNBysJHMz893e4RVaTabPTMrsPGse7nwl7/85cf+R/vmm2/mmWeeyfHj\nx7Np06acOnVqvS8BANBz1hVZH3zwQc6cOZNvfvObSZJ2u53z589nz549SZK9e/dmbm5u/VMCAPSY\nda3jvfHGG9m/f39u3LiRJLl27VoGBwfT39+fJBkdHc3S0tJ9952ZmcnMzEySZHJyMo1GYz2jANyX\nzxagW9YcWb/97W8zPDyc8fHxnD9//qH3b7VaabVa9x4vLi6udRSAf8pnC9BpY2Njq9puzZH1/vvv\n55133snZs2dz8+bN3LhxI2+88UauX7+e27dvp7+/P0tLSxkdHV3rSwAA9Kw1R9bzzz+f559/Pkly\n/vz5/OIXv8gPfvCDvPrqq3n77bfzta99LadPn86uXbs6NiwAQK/o+M1IX3jhhbz11ls5ePBgVlZW\nsm/fvk6/BADAI6+v3W63uz1EkiwsLHR7BGCDcZ8soMJqr8nyszoAAAVEFgBAAZEFAFBAZAEAFBBZ\nAAAFRBYAQAGRBQBQQGQBABQQWQAABUQWAEABkQUAUEBkAQAUEFkAAAVEFgBAAZEFAFBAZAEAFBBZ\nAAAFRBYAQAGRBQBQQGQBABQQWQAABUQWAEABkQUAUEBkAQAUEFkAAAVEFgBAAZEFAFBAZAEAFBBZ\nAAAFRBYAQAGRBQBQQGQBABQQWQAABUQWAEABkQUAUEBkAQAUEFkAAAVEFgBAAZEFAFBAZAEAFBBZ\nAAAFRBYAQAGRBQBQQGQBABQQWQAABUQWAEABkQUAUEBkAQAUEFkAAAVEFgBAAZEFAFBAZAEAFBBZ\nAAAFRBYAQAGRBQBQQGQBABQQWQAABUQWAEABkQUAUEBkAQAUEFkAAAUGuj0AQJI0m82e+bvz8/Md\n/5vAxiOygEdCRbg0Go0sLi52/O8CrIblQgCAAiILAKCAyAIAKLDma7IWFxfz2muv5fLly+nr60ur\n1cq3vvWtrKysZGpqKpcuXcq2bdty6NChDA0NdXJmAIBH3pojq7+/P9/73vcyPj6eGzdu5MiRI/nS\nl76U06dPZ+fOnXn22WczPT2d6enp7N+/v5MzAwA88ta8XDgyMpLx8fEkyac//ek0m80sLS1lbm4u\nExMTSZKJiYnMzc11ZlIAgB7SkVs4XLx4MRcuXMgXvvCFXLlyJSMjI0nuhtjVq1fvu8/MzExmZmaS\nJJOTk2k0Gp0YBeCegYEBny1A16w7sv72t7/l6NGjefHFFzM4OLjq/VqtVlqt1r3H7mUDdJr7ZAEV\nxsbGVrXdur5deOvWrRw9ejRf//rX8/TTTydJhoeHs7y8nCRZXl7O5s2b1/MSAAA9ac2R1W638/rr\nr6fZbObb3/72ved37dqV2dnZJMns7Gx27969/ikBAHpMX7vdbq9lx/feey8//vGP89nPfjZ9fX1J\nku9+97vZsWNHpqamsri4mEajkcOHD6/qFg4LCwtrGQPgn7JcCFRY7XLhmiOr00QW0GkiC6jwL7km\nCwCA+xNZAAAFRBYAQIFH5posAICNxJksYMM6cuRIt0cAPsFEFgBAAZEFAFBAZAEb1j/+PirAv5oL\n3wEACjiTBQBQYKDbAwB02okTJ3LmzJkMDw/n6NGj3R4H+IRyJgvYcPbu3ZtXXnml22MAn3AiC9hw\nnnzyyQwNDXV7DOATTmQBABQQWQAABUQWAEABkQUAUMDNSIEN59ixY3n33Xdz7dq1DA8P57nnnsu+\nffu6PRbwCSOyAAAKWC4EACggsgAACogsAIACIgsAoIDIAgAoILIAAAqILACAAiILAKDAfwNPbjqb\n+yWd7wAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 720x504 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "fig, ax = plt.subplots()\n",
    "_ = ax.boxplot(df.price)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Data Loading\n",
    "from fastparquet import ParquetFile\n",
    "out_data_path = '/Users/mithursan/WDC_tickbidask.parq'\n",
    "pf = ParquetFile(out_data_path)\n",
    "df = pf.to_pandas()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>price</th>\n",
       "      <th>bid</th>\n",
       "      <th>ask</th>\n",
       "      <th>vol</th>\n",
       "      <th>dollar_vol</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>date</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2010-02-17 09:30:01</th>\n",
       "      <td>42.8700</td>\n",
       "      <td>42.70</td>\n",
       "      <td>43.01</td>\n",
       "      <td>1161</td>\n",
       "      <td>49772.07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2010-02-17 09:30:10</th>\n",
       "      <td>42.8700</td>\n",
       "      <td>42.70</td>\n",
       "      <td>42.98</td>\n",
       "      <td>116</td>\n",
       "      <td>4972.92</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2010-02-17 09:30:12</th>\n",
       "      <td>42.9400</td>\n",
       "      <td>42.78</td>\n",
       "      <td>42.95</td>\n",
       "      <td>25800</td>\n",
       "      <td>1107852.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2010-02-17 09:30:12</th>\n",
       "      <td>42.9400</td>\n",
       "      <td>42.78</td>\n",
       "      <td>42.94</td>\n",
       "      <td>9</td>\n",
       "      <td>386.46</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2010-02-17 09:30:12</th>\n",
       "      <td>42.9384</td>\n",
       "      <td>42.84</td>\n",
       "      <td>42.93</td>\n",
       "      <td>300</td>\n",
       "      <td>12881.52</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                       price    bid    ask    vol  dollar_vol\n",
       "date                                                         \n",
       "2010-02-17 09:30:01  42.8700  42.70  43.01   1161    49772.07\n",
       "2010-02-17 09:30:10  42.8700  42.70  42.98    116     4972.92\n",
       "2010-02-17 09:30:12  42.9400  42.78  42.95  25800  1107852.00\n",
       "2010-02-17 09:30:12  42.9400  42.78  42.94      9      386.46\n",
       "2010-02-17 09:30:12  42.9384  42.84  42.93    300    12881.52"
      ]
     },
     "execution_count": 114,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Data Loading\n",
    "from fastparquet import ParquetFile\n",
    "out_data_path = '/Users/mithursan/IVE_tickbidask.parq'\n",
    "pf = ParquetFile(out_data_path)\n",
    "df = pf.to_pandas()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>price</th>\n",
       "      <th>bid</th>\n",
       "      <th>ask</th>\n",
       "      <th>vol</th>\n",
       "      <th>dollar_vol</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>date</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2009-09-28 09:30:00</th>\n",
       "      <td>50.79</td>\n",
       "      <td>50.70</td>\n",
       "      <td>50.79</td>\n",
       "      <td>100</td>\n",
       "      <td>5079.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2009-09-28 09:30:00</th>\n",
       "      <td>50.71</td>\n",
       "      <td>50.70</td>\n",
       "      <td>50.79</td>\n",
       "      <td>638</td>\n",
       "      <td>32352.98</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2009-09-28 09:31:32</th>\n",
       "      <td>50.75</td>\n",
       "      <td>50.75</td>\n",
       "      <td>50.76</td>\n",
       "      <td>100</td>\n",
       "      <td>5075.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2009-09-28 09:31:33</th>\n",
       "      <td>50.75</td>\n",
       "      <td>50.72</td>\n",
       "      <td>50.75</td>\n",
       "      <td>100</td>\n",
       "      <td>5075.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2009-09-28 09:31:50</th>\n",
       "      <td>50.75</td>\n",
       "      <td>50.73</td>\n",
       "      <td>50.76</td>\n",
       "      <td>300</td>\n",
       "      <td>15225.00</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                     price    bid    ask  vol  dollar_vol\n",
       "date                                                     \n",
       "2009-09-28 09:30:00  50.79  50.70  50.79  100     5079.00\n",
       "2009-09-28 09:30:00  50.71  50.70  50.79  638    32352.98\n",
       "2009-09-28 09:31:32  50.75  50.75  50.76  100     5075.00\n",
       "2009-09-28 09:31:33  50.75  50.72  50.75  100     5075.00\n",
       "2009-09-28 09:31:50  50.75  50.73  50.76  300    15225.00"
      ]
     },
     "execution_count": 116,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
